{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "private_outputs": true,
      "provenance": [],
      "authorship_tag": "ABX9TyNkWmtVrv/giQ6PEPfzoAVw",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/ktynski/Marketing_Automations_Notebooks_With_GPT/blob/main/Automatic_Reddit_Trend_Research_with_GPT3_(Public).ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "1. Install dependencies"
      ],
      "metadata": {
        "id": "Yqy2k5CrOlto"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install praw\n",
        "!pip install openai\n",
        "!pip install nltk"
      ],
      "metadata": {
        "id": "KThkDZm2F5tv"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "2. Run the cell below to scrape reddit for top posts and a readout analysis of those posts as CSVs (available in the file folder on the left after running the cell below. You will need an OpenAI api key, and credentials from a reddit app, which you can get here:https://www.reddit.com/prefs/apps"
      ],
      "metadata": {
        "id": "mamHhndqOowA"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import praw\n",
        "import pandas as pd\n",
        "import openai\n",
        "import time\n",
        "import nltk\n",
        "from nltk.tokenize import word_tokenize\n",
        "nltk.download('punkt')\n",
        "import concurrent.futures\n",
        "\n",
        "# OpenAI API key\n",
        "openai.api_key = \"Your OpenAI Api Key\"\n",
        "\n",
        "# Keyword:\n",
        "keyword = \"Elon Musk\"\n",
        "#number of posts to get from reddit:\n",
        "num_results = 500\n",
        "#Enter the date range (1 day, 1 week, 1 month, 6 months, 1 year, all time):\n",
        "date_range = \"6 months\"\n",
        "\n",
        "\n",
        "prompts = [\n",
        "\"Please summarize the main topic discussed in the Reddit posts in my corpus as a short essay.\",\n",
        "\"Please extract the 20 most common key points or highlights from the the corpus that summarize the main ideas discussed.\",\n",
        "\"Please identify the 20 most common words or phrasesin the corpus and their frequency of use.\",\n",
        "\"Please find the 20 most frequently asked questions in the corpus.\",\n",
        "\"Please identify 20 important or noteworthy patterns or trends in the corpus.\",\n",
        "\"Please generate a comprehensive summary of the positive parts of the corpus in the form of bullet points.\",\n",
        "\"Please generate a comprehensive summary of the negative parts of the corpus in the form of bullet points.\",\n",
        "\"Please generate a list of 20 common questions found in the corpus, covering the main topics and issues discussed.\",\n",
        "\"Please create a hierarchy of topics/subtopics/subsubtopics encountered most frequently in the corpus.\",\n",
        "\"Please list all of entities mentioned in the corpus, sorted by most to least mentioned.\",\n",
        "\"Please suggest 10 newsworthy and clickable data-journalism story ideas titles based on what you know was popular in the corpus.\"\n",
        "]\n",
        "\n",
        "def get_posts(keyword, num_results, date_rangea):\n",
        "    # Connect to Reddit API using PRAW, use the credentials you get from reddit after creating an app.\n",
        "    reddit = praw.Reddit(client_id='your client id',\n",
        "                     client_secret='your client secret',\n",
        "                     user_agent='name of your reddit app')\n",
        "\n",
        "    # Calculate the start and end date based on the selected date range\n",
        "    end_date = int(time.time())\n",
        "    if date_range == \"1 day\":\n",
        "        start_date = end_date - 86400\n",
        "    elif date_range == \"1 week\":\n",
        "        start_date = end_date - 604800\n",
        "    elif date_range == \"1 month\":\n",
        "        start_date = end_date - 2592000\n",
        "    elif date_range == \"6 months\":\n",
        "        start_date = end_date - 15552000\n",
        "    elif date_range == \"1 year\":\n",
        "        start_date = end_date - 31104000\n",
        "    else:\n",
        "        start_date = 0\n",
        "\n",
        "    # Get the specified number of posts with keyword in the title\n",
        "    posts = []\n",
        "    for post in reddit.subreddit(\"all\").search(keyword, limit=num_results):\n",
        "        if start_date <= post.created_utc <= end_date:\n",
        "            posts.append([post.title, post.created_utc, post.permalink])\n",
        "\n",
        "    # Create a Pandas DataFrame from the list of posts\n",
        "    df = pd.DataFrame(posts, columns=[\"Title\", \"Timestamp\", \"Permalink\"])\n",
        "    df.to_csv(\"redditposts.csv\")\n",
        "    return df\n",
        "\n",
        "def GPT3_evaluation(posts_df,prompt):\n",
        "    # Get the list of popular post titles\n",
        "    post_titles = posts_df['Title'].tolist()\n",
        "    post_titles = ' '.join(post_titles)\n",
        "\n",
        "    # Tokenize the list of post titles\n",
        "    tokens = word_tokenize(post_titles)\n",
        "\n",
        "    # Truncate the list of tokens to 4000 or less\n",
        "    if len(tokens) > 2800:\n",
        "        tokens = tokens[:2800]\n",
        "\n",
        "    post_titles = ' '.join(tokens)\n",
        "    prompt = f\"{prompt} \\n Corpus: {post_titles} \\n Readout:\"\n",
        "\n",
        "\n",
        "    # Send the prompt to the OpenAI API\n",
        "    response = openai.Completion.create(\n",
        "        engine=\"text-davinci-003\",\n",
        "        prompt=prompt,\n",
        "        max_tokens=500,\n",
        "        n=1,\n",
        "        stop=None,\n",
        "        temperature=0.5,\n",
        "    )\n",
        "\n",
        "    # Display the answer from GPT-3\n",
        "    print(response[\"choices\"][0][\"text\"])\n",
        "    return response[\"choices\"][0][\"text\"]\n",
        "\n",
        "\n",
        "def run_prompt(prompt):\n",
        "    posts_df = get_posts(keyword, num_results, date_range)\n",
        "    response = GPT3_evaluation(posts_df, prompt)\n",
        "    return prompt, response\n",
        "\n",
        "results = []\n",
        "with concurrent.futures.ThreadPoolExecutor() as executor:\n",
        "    future_to_prompt = {executor.submit(run_prompt, prompt): prompt for prompt in prompts}\n",
        "    for future in concurrent.futures.as_completed(future_to_prompt):\n",
        "        prompt = future_to_prompt[future]\n",
        "        try:\n",
        "            result = future.result()\n",
        "            results.append(result)\n",
        "        except Exception as exc:\n",
        "            print(f'{prompt} generated an exception: {exc}')\n",
        "\n",
        "GPT3answers = pd.DataFrame(results, columns=[\"Prompt\", \"Answer from GPT-3\"])\n",
        "GPT3answers.to_csv(\"GPT3_readout.csv\")\n"
      ],
      "metadata": {
        "id": "bSJR46ga5rTk"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}